library(abind)
library(sciplot)
library(tidyverse)
library(ggplot2)
library(RColorBrewer)
library(plyr)
library(dplyr)
library(ggpubr)
library(vegan)
library(nlme)
library(car)
library(patchwork)
library(ARTool)
getwd()
[1] "/Users/saradellwilliams/Dropbox/My_Publications/SpatEpiSCTLD/SCTLDepizootiology_lowerFLkeys/NonSpatialAnalyses"
Version control: R version 4.0.2 (2020-06-22)
sessionInfo()$R.version
$platform
[1] "x86_64-apple-darwin17.0"
$arch
[1] "x86_64"
$os
[1] "darwin17.0"
$system
[1] "x86_64, darwin17.0"
$status
[1] ""
$major
[1] "4"
$minor
[1] "0.2"
$year
[1] "2020"
$month
[1] "06"
$day
[1] "22"
$`svn rev`
[1] "78730"
$language
[1] "R"
$version.string
[1] "R version 4.0.2 (2020-06-22)"
$nickname
[1] "Taking Off Again"
my.se<-function(x){
sd(x,na.rm=TRUE)/sqrt(length(x))
}
my.se.rows<-function(x){
se.tp<-c()
for(i in 1:nrow(x)){
se.tp[i]<-my.se(x[i,1:2])
}
return(se.tp)
}
my.se.cols<-function(x,a){
se.tp<-c()
for (i in a:ncol(x)){
se.tp[i]<-my.se(x[,i])
}
return(se.tp[12:32])
}
nrow(my.data)
[1] 2012
(anova(mod))
Analysis of Variance of Aligned Rank Transformed Data
Table Type: Anova Table (Type III tests)
Model: No Repeated Measures (lm)
Response: art(percent.cover)
Df Df.res F value Pr(>F)
1 Site 2 594 90.689 < 2.22e-16 ***
2 timept 1 594 38.231 1.1689e-09 ***
3 Site:timept 2 594 11.481 1.2813e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
testInteractions(artlm(mod,"Site:timept"),pairwise=c("Site","timept"))
F Test:
P-value adjustment method: holm
Value Df Sum of Sq F Pr(>F)
Midchannel-Nearshore : t1-t2 -117.15 1 343103 12.6884 0.0007948 ***
Midchannel-Offshore : t1-t2 32.72 1 26765 0.9898 0.3201949
Nearshore-Offshore : t1-t2 149.87 1 561525 20.7660 1.891e-05 ***
Residuals 594 16062123
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
my.df<-read.csv("SWG_SCTLDprogrates.csv")
kruskal.test(Avg_prweek~Site,data=my.df)
Kruskal-Wallis rank sum test
data: Avg_prweek by Site
Kruskal-Wallis chi-squared = 1.4024, df = 2, p-value = 0.496
my.df$Site<-as.factor(my.df$Site)
### subset by site
tl.s1<-subset(my.df,subset=Site==1)
tl.s2<-subset(my.df,subset=Site==2)
tl.s3<-subset(my.df,subset=Site==3)
s1means<-mean(tl.s1$Avg_prweek)
s2means<-mean(tl.s2$Avg_prweek)
s3means<-mean(tl.s3$Avg_prweek)
#standard error
s1se<-my.se(tl.s1$Avg_prweek)
s2se<-my.se(tl.s2$Avg_prweek)
s3se<-my.se(tl.s3$Avg_prweek)
tl.means<-rbind(s3means,s1means,s2means)
#tl.means
tl.se<-rbind(s3se,s1se,s2se)
tl.se[is.na(tl.se)]<-0
bp<-barplot(as.matrix((tl.means)),beside=TRUE,ylim=c(0,15),ylab=strwrap("Change in percent tissue loss per week per colony with SCTLD",width=40) ,names.arg=c("Inshore","Midchannel","Offshore"),col=c("grey","light blue","blue"))
arrows(x0=bp,x1=bp,y0=(tl.means)-1.96*(tl.se),y1=(tl.means)+1.96*(tl.se),code = 3, angle = 90, len = 0.02, xpd = NA)
legend(x = 1, y=104,legend = c("Inshore","Midchannel","Offshore"), fill =c("grey","light blue","blue"),bty="n")
No significant differences in percent tissue loss per week per colony among sites. So we don't need to include site as a factor when looking for differences in progression rates among species.
aggregate(Avg_prweek~Sps,data=my.df,FUN=mean)
Sps Avg_prweek
1 CNAT 14.743730
2 DLAB 15.526620
3 DSTO 13.012624
4 MCAV 6.480868
5 OANN 4.146442
6 OFAV 3.069835
7 PCLI 2.737089
8 PSTR 14.511162
9 SBOU 8.427431
10 SINT 2.707302
11 SSID 2.701758
my.df$Sps<-as.factor(my.df$Sps)
#my.df_noplci<-subset(my.df,subset=Sps!="PCLI")
mod<-aov((Avg_prweek)~Sps,data=my.df)
plot(mod)
not plotting observations with leverage one:
28
shapiro.test(mod$residuals)
Shapiro-Wilk normality test
data: mod$residuals
W = 0.9273, p-value = 1.02e-07
#try transformations
mod<-aov(log(Avg_prweek)~Sps,data=my.df)
plot(mod)
not plotting observations with leverage one:
28
shapiro.test(mod$residuals)
Shapiro-Wilk normality test
data: mod$residuals
W = 0.9709, p-value = 0.0009604
#resort to kruskal wallis
kruskal.test(Avg_prweek~Sps,data=my.df)
Kruskal-Wallis rank sum test
data: Avg_prweek by Sps
Kruskal-Wallis chi-squared = 82.974, df = 10, p-value = 1.308e-13
Significant differences among species.
sps.avg<-aggregate(Avg_prweek~Sps,data=my.df,FUN=mean)
sps.se<-aggregate(Avg_prweek~Sps,data=my.df,FUN=my.se)
par(family="Times New Roman")
bp<-barplot(as.matrix(t(sps.avg$Avg_prweek)),ylim=c(0,25),las=1,ylab="Percent loss per week",names.arg=c(as.character(sps.avg$Sps)),las=2,col="grey")
arrows(x0=bp,x1=bp,y0=(sps.avg$Avg_prweek)-1.96*(sps.se$Avg_prweek),y1=(sps.avg$Avg_prweek)+1.96*(sps.se$Avg_prweek),code = 3, angle = 90, len = 0.02, xpd = NA)
numbers<- c("37","6","37","21","11","4","1","28","8","11","12")
text(x=bp,y=1,numbers )
#text(x = bp, y =(sps.avg$Avg_prweek)+1.96*(sps.se$Avg_prweek), labels, pos = 3)
Average progression rates and total incidence among sites were negatively associated with SST and DHW from 04 January 2019 when the disease incidence was first over 5 cases to the end of the surveys on 06 December 2019.
envdis<-read.csv("extended_envdisFig3.csv")
colnames(envdis)<-c("dates","sst","bleachalert","dhw","t.anom","tl.means","tl.se","newInc")
summary(envdis)
dates sst bleachalert dhw
Length:26 Min. :24.44 Length:26 Min. :0.000
Class :character 1st Qu.:26.52 Class :character 1st Qu.:0.000
Mode :character Median :27.73 Mode :character Median :0.000
Mean :28.06 Mean :1.328
3rd Qu.:30.16 3rd Qu.:1.813
Max. :31.31 Max. :8.189
t.anom tl.means tl.se newInc
Min. :-0.1952 Min. : 0.0000 Min. :0.00000 Min. : 0.000
1st Qu.: 0.8114 1st Qu.: 0.1586 1st Qu.:0.08929 1st Qu.: 1.000
Median : 1.3769 Median : 2.3202 Median :1.55537 Median : 3.000
Mean : 1.3154 Mean : 4.8487 Mean :2.03539 Mean : 6.962
3rd Qu.: 1.5992 3rd Qu.: 7.5242 3rd Qu.:3.78488 3rd Qu.: 8.750
Max. : 3.6582 Max. :19.7993 Max. :5.64914 Max. :32.000
#incidence and dhw
mod<-lm(log(newInc)~dhw,data=envdis[10:26,])
shapiro.test(mod$residuals)
Shapiro-Wilk normality test
data: mod$residuals
W = 0.96345, p-value = 0.6969
acf(mod$residuals)
cor.test(envdis$dhw[10:26],log(envdis$newInc[10:26]))
Pearson's product-moment correlation
data: envdis$dhw[10:26] and log(envdis$newInc[10:26])
t = -4.8114, df = 15, p-value = 0.0002287
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.9164826 -0.4768898
sample estimates:
cor
-0.7789808
#incidence and sst
mod<-lm(log(newInc)~sst,data=envdis[10:26,])
shapiro.test(mod$residuals)
Shapiro-Wilk normality test
data: mod$residuals
W = 0.95177, p-value = 0.4851
acf(mod$residuals)
cor.test(envdis$sst[10:26],log(envdis$newInc[10:26]))
Pearson's product-moment correlation
data: envdis$sst[10:26] and log(envdis$newInc[10:26])
t = -2.6238, df = 15, p-value = 0.01917
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.8203640 -0.1098399
sample estimates:
cor
-0.5608739
#tissue loss and dhw
mod<-lm((tl.means)~dhw,data=envdis[10:26,])
shapiro.test(mod$residuals)
Shapiro-Wilk normality test
data: mod$residuals
W = 0.9527, p-value = 0.5005
acf(mod$residuals)
cor.test(envdis$dhw[10:26],log(envdis$tl.means[10:26]))
Pearson's product-moment correlation
data: envdis$dhw[10:26] and log(envdis$tl.means[10:26])
t = -3.4867, df = 15, p-value = 0.003313
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.8699510 -0.2777415
sample estimates:
cor
-0.6690689
#tissue loss and sst
mod<-lm(log(tl.means)~sst,data=envdis[10:26,])
shapiro.test(mod$residuals)
Shapiro-Wilk normality test
data: mod$residuals
W = 0.93112, p-value = 0.2272
acf(mod$residuals)
cor.test(envdis$sst[10:26],log(envdis$tl.means[10:26]))
Pearson's product-moment correlation
data: envdis$sst[10:26] and log(envdis$tl.means[10:26])
t = -0.82387, df = 15, p-value = 0.4229
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.6260975 0.3028671
sample estimates:
cor
-0.2080662
log(envdis$tl.means)
[1] -Inf -Inf -Inf -Inf -Inf -0.67067433
[7] -2.60553464 -1.98673214 -1.50122433 0.54232429 1.37182233 0.99883478
[13] 1.80615243 1.88022812 2.60513947 2.98564560 2.87658317 2.75781171
[19] 2.22494913 2.06015961 0.65511978 0.04380262 0.16475523 1.19158875
[25] 1.52128953 2.09544188
colnames(envdis)<-c("dates","sst","bleachalert","dhw","t.anom","tl.means","tl.se","newInc")
tl.newI<-rbind(envdis$tl.means,envdis$newInc)
tl.newI
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
[1,] 0 0 0 0 0 0.5113636 0.07386364 0.1371429 0.2228571 1.72
[2,] 0 0 0 0 0 1.0000000 1.00000000 2.0000000 2.0000000 8.00
[,11] [,12] [,13] [,14] [,15] [,16] [,17] [,18]
[1,] 3.942529 2.715116 6.086982 6.555 13.53311 19.79928 17.75351 15.76531
[2,] 7.000000 4.000000 19.000000 14.000 32.00000 19.00000 27.00000 8.00000
[,19] [,20] [,21] [,22] [,23] [,24] [,25] [,26]
[1,] 9.253012 7.847222 1.925373 1.044776 1.179104 3.292308 4.578125 8.129032
[2,] 7.000000 3.000000 1.000000 1.000000 1.000000 3.000000 12.000000 9.000000
tl.newI.se<-rbind(envdis$tl.se,rep(0,times=length(tl.se)))
number of columns of result is not a multiple of vector length (arg 2)
envdis$dates<-revalue(envdis$dates, c( "X05.01.18"="05-01-18","X06.01.18"="06-01-18","X06.21.18"="06-21-18","X07.16.18"="07-16-18","X08.17.18"="08-17-18","X10.30.18"="10-30-18", "X11.9.18"="11-09-18", "X11.29.18"="11-29-18","X12.13.18"="12-13-18","X1.4.19"="01-04-19","X1.18.19"="01-18-19","X2.8.19"="02-08-19","X3.4.19"="03-04-19","X3.21.19"="03-21-19","X4.11.19"="04-11-19","X5.2.19"="05-02-19","X5.16.19"="05-16-19","X5.28.19"="05-28-19","X6.13.19"="06-13-19","X7.1.19"="07-01-19","X7.22.19"="07-22-19","X8.16.19"="08-16-19","X9.17.19"="09-17-19","X10.14.19"="10-14-19","X11.12.19"="11-12-19","X12.6.19"="12-06-19"))
The following `from` values were not present in `x`: X05.01.18, X06.01.18, X06.21.18, X07.16.18, X08.17.18, X10.30.18, X11.9.18, X11.29.18, X12.13.18, X1.4.19, X1.18.19, X2.8.19, X3.4.19, X3.21.19, X4.11.19, X5.2.19, X5.16.19, X5.28.19, X6.13.19, X7.1.19, X7.22.19, X8.16.19, X9.17.19, X10.14.19, X11.12.19, X12.6.19
envdis$dates<-as.Date(envdis$dates,"%m-%d-%y")
envdis$dates<-format(envdis$dates,"%d %b %y")
#tiff("Figure3.tiff",width=180, height=120,units="mm",res=300)
par(oma = c(0, 1, 1, 3),family="Times New Roman")
bp<-barplot(as.matrix((tl.newI)),beside=TRUE,ylim=c(0,35),las=1,ylab="Disease metric", names.arg=envdis$dates,las=2,col=c("light grey","dark grey"))
legend("topleft",cex=.7,bty="n",col=c("light grey","dark grey"),fill=c("light grey","dark grey"),legend=c("Percent coral tissue loss","Incidence"))
arrows(x0=bp,x1=bp,y0=(tl.newI)-(tl.newI.se),y1=(tl.newI)+(tl.newI.se),code = 3, angle = 90, len = 0.02, xpd = NA)
zero-length arrow is of indeterminate angle and so skippedzero-length arrow is of indeterminate angle and so skippedzero-length arrow is of indeterminate angle and so skippedzero-length arrow is of indeterminate angle and so skippedzero-length arrow is of indeterminate angle and so skippedzero-length arrow is of indeterminate angle and so skippedzero-length arrow is of indeterminate angle and so skippedzero-length arrow is of indeterminate angle and so skippedzero-length arrow is of indeterminate angle and so skippedzero-length arrow is of indeterminate angle and so skippedzero-length arrow is of indeterminate angle and so skippedzero-length arrow is of indeterminate angle and so skippedzero-length arrow is of indeterminate angle and so skippedzero-length arrow is of indeterminate angle and so skippedzero-length arrow is of indeterminate angle and so skippedzero-length arrow is of indeterminate angle and so skippedzero-length arrow is of indeterminate angle and so skippedzero-length arrow is of indeterminate angle and so skippedzero-length arrow is of indeterminate angle and so skippedzero-length arrow is of indeterminate angle and so skippedzero-length arrow is of indeterminate angle and so skippedzero-length arrow is of indeterminate angle and so skippedzero-length arrow is of indeterminate angle and so skippedzero-length arrow is of indeterminate angle and so skippedzero-length arrow is of indeterminate angle and so skippedzero-length arrow is of indeterminate angle and so skippedzero-length arrow is of indeterminate angle and so skippedzero-length arrow is of indeterminate angle and so skippedzero-length arrow is of indeterminate angle and so skippedzero-length arrow is of indeterminate angle and so skippedzero-length arrow is of indeterminate angle and so skippedzero-length arrow is of indeterminate angle and so skippedzero-length arrow is of indeterminate angle and so skipped
par(new = T, family="Times New Roman")
plot(bp[1,],envdis$dhw,xlab=NA,ylab=NA,axes=F,ylim=c(0,10),type="b",col="blue",pch=19,lwd=2)
axis(side = 4,col="blue",line=3, lwd=2)
#mtext(side = 4, line = 3, 'Degree Heating Week')
par(new = T)
plot(bp[1,],envdis$sst,xlab=NA,ylab=NA,axes=F,ylim=c(20,34),type="b",col="red")
axis(side = 4,col="red")
mtext(side = 3, 'SST DHW',at=86,line=1)
#dev.off()
###looking at thermal stress
my.data.ts<-read.csv("SCTLD_END_exta_ts.csv") ## this file is the same as the original, but includes columns related to thermal stress signs
dis.sps.ts<-my.data.ts%>%
filter(Sps!="AAGA",Sps!="ACER",Sps!="ATEN",Sps!="EFAS",Sps!="MANG",Sps!="MMEA",Sps!="MYCE",Sps!="OCUL",Sps!="ODIF",Sps!="PAST",Sps!="PDIV",Sps!="PPOR",Sps!="SRAD")%>%
droplevels()
dis.ts.table<-table(dis.sps.ts$tot_diseased,dis.sps.ts$tot_stressed)
chisq.test(dis.ts.table)$expected
NS S
Dis 153.8277 22.17232
Health 1185.1723 170.82768
dis.ts.table
NS S
Dis 146 30
Health 1193 163
chisq.test(dis.ts.table)
Pearson's Chi-squared test with Yates' continuity correction
data: dis.ts.table
X-squared = 3.1304, df = 1, p-value = 0.07685
my.table<-table(my.data$Sps,my.data$glom)
my.table
Healthy SCTLD
AAGA 1 0
ACER 13 0
CNAT 24 37
DLAB 7 6
DSTO 51 37
MCAV 133 21
MMEA 1 0
MYCE 2 0
OANN 22 11
OCUL 2 0
ODIF 2 0
OFAV 21 4
PAST 440 0
PCLI 4 1
PDIV 1 0
PPOR 9 0
PSTR 27 28
SBOU 42 8
SINT 392 11
SRAD 9 0
SSID 633 12
chisq.test(my.table)$expected #does not meet assumptions for chi square
Chi-squared approximation may be incorrect
Healthy SCTLD
AAGA 0.9125249 0.08747515
ACER 11.8628231 1.13717694
CNAT 55.6640159 5.33598410
DLAB 11.8628231 1.13717694
DSTO 80.3021869 7.69781312
MCAV 140.5288270 13.47117296
MMEA 0.9125249 0.08747515
MYCE 1.8250497 0.17495030
OANN 30.1133201 2.88667992
OCUL 1.8250497 0.17495030
ODIF 1.8250497 0.17495030
OFAV 22.8131213 2.18687873
PAST 401.5109344 38.48906561
PCLI 4.5626243 0.43737575
PDIV 0.9125249 0.08747515
PPOR 8.2127237 0.78727634
PSTR 50.1888668 4.81113320
SBOU 45.6262425 4.37375746
SINT 367.7475149 35.25248509
SRAD 8.2127237 0.78727634
SSID 588.5785288 56.42147117
fisher.test(my.table,simulate.p.value=T)
Fisher's Exact Test for Count Data with simulated p-value (based on 2000
replicates)
data: my.table
p-value = 0.0004998
alternative hypothesis: two.sided
#chisq.test(table(dis.sps$Sps,dis.sps$glom))$expected
Same, but for just the 11 susceptible species?
dis.sps<-my.data%>%
filter(Sps!="AAGA",Sps!="ACER",Sps!="ATEN",Sps!="EFAS",Sps!="MANG",Sps!="MMEA",Sps!="MYCE",Sps!="OCUL",Sps!="ODIF",Sps!="PAST",Sps!="PDIV",Sps!="PPOR",Sps!="SRAD")%>%
droplevels()
my.table<-table(dis.sps$Sps,dis.sps$glom)
chisq.test(my.table)$expected #does not meet assumptions for chi square
Chi-squared approximation may be incorrect
Healthy SCTLD
CNAT 53.992167 7.0078329
DLAB 11.506527 1.4934726
DSTO 77.890339 10.1096606
MCAV 136.308094 17.6919060
OANN 29.208877 3.7911227
OFAV 22.127937 2.8720627
PCLI 4.425587 0.5744125
PSTR 48.681462 6.3185379
SBOU 44.255875 5.7441253
SINT 356.702350 46.2976501
SSID 570.900783 74.0992167
fisher.test(my.table,simulate.p.value=T)
Fisher's Exact Test for Count Data with simulated p-value (based on 2000
replicates)
data: my.table
p-value = 0.0004998
alternative hypothesis: two.sided
## let's look at all under 200cm, becuase there's just one or two outliers
mwidth.200cm<-subset(dis.sps,subset=Max_width<=200)
nrow(mwidth.200cm)/nrow(dis.sps) #99.7% under 200cm
[1] 0.997389
mw.means<-aggregate(Max_width~glom,data=mwidth.200cm,FUN=mean)
#mw.means
mw.se<-aggregate(Max_width~glom,data=mwidth.200cm,FUN=function(x) sd(x)/sqrt(length(x)))
#mw.se
ci.upp <- mw.means$Max_width + 1.96 * mw.se$Max_width
#ci.upp
ci.low <- mw.means$Max_width - 1.96 * mw.se$Max_width
disnames<-c("Healthy","SCTLD")
bp <- barplot(mw.means$Max_width, beside = TRUE, names = disnames,col=c("lightgrey","darkgrey"),ylim=c(0,100),ylab="Maximum Colony Width (cm)",horiz=FALSE)
arrows(y0 = ci.low, y1 = ci.upp, x0 = bp, x1 = bp, angle = 90, code = 3, length = 0.1)
aggregate(Max_width~glom,data=mwidth.200cm,FUN=mean)
glom Max_width
1 Healthy 23.44050
2 SCTLD 38.65143
dis.w<-subset(mwidth.200cm,subset=glom=="SCTLD",select="Max_width")
health.w<-subset(mwidth.200cm,subset=glom=="Healthy",select="Max_width")
shapiro.test((dis.w$Max_width))
Shapiro-Wilk normality test
data: (dis.w$Max_width)
W = 0.8005, p-value = 3.452e-14
shapiro.test((health.w$Max_width))
Shapiro-Wilk normality test
data: (health.w$Max_width)
W = 0.63291, p-value < 2.2e-16
wilcox.test((dis.w$Max_width),(health.w$Max_width)) #significant! p-value = 8.132e-12
Wilcoxon rank sum test with continuity correction
data: (dis.w$Max_width) and (health.w$Max_width)
W = 155581, p-value = 1.069e-11
alternative hypothesis: true location shift is not equal to 0
We use the Wilcoxon rank sum test (Mann-Whitney) becuase the data do not meet assumptions. Compares the medians of two groups using ranks.
#table(mwidth.200cm$Sps,mwidth.200cm$glom)
#levels(as.factor(mwidth.200cm$Sps))
#mwidth.200cm$Sps<-as.factor(mwidth.200cm$Sps)
resmat<-matrix(NA,nrow=0,ncol=3)
dis.sps_np<-subset(dis.sps,subset=Sps!="PCLI") #take out pcli because only one colony
dis.sps_np$Sps<-as.factor(dis.sps_np$Sps)
#levels(dis.sps_np$Sps)
for (i in 1:length(levels(dis.sps_np$Sps))){
#print(levels(dis.sps_np$Sps)[i])
just.one<-subset(dis.sps_np,subset=Sps==levels(dis.sps_np$Sps)[i])
dis.w<-subset(just.one,subset=glom=="SCTLD",select="Max_width")
health.w<-subset(just.one,subset=glom=="Healthy",select="Max_width")
stat<-wilcox.test(dis.w$Max_width,health.w$Max_width)
results<-c(levels(dis.sps_np$Sps)[i], stat$statistic,stat$p.value)
resmat<-rbind(resmat,results)
}
cannot compute exact p-value with tiescannot compute exact p-value with tiescannot compute exact p-value with tiescannot compute exact p-value with tiescannot compute exact p-value with tiescannot compute exact p-value with ties
nomaxorpcli_all_results<-cbind(resmat,p.adjust(as.numeric(resmat[,3]),"hochberg"),p.adjust(as.numeric(resmat[,3]),"holm"),p.adjust(as.numeric(resmat[,3]),"bonferroni"))
results<-as.data.frame(nomaxorpcli_all_results)
colnames(results)<-c("sps","wstat", "p.val","benajmini_hochberg","holm", "bonferoni")
results
sps wstat p.val benajmini_hochberg
results CNAT 725.5 3.28247658453933e-05 0.00029542289260854
results.1 DLAB 19.5 0.883001503003956 0.970406743671115
results.2 DSTO 1449.5 1.70321971776986e-05 0.000170321971776986
results.3 MCAV 1619.5 0.240762859545797 0.963051438183188
results.4 OANN 179 0.0271590101078966 0.16295406064738
results.5 OFAV 41 0.970406743671115 0.970406743671115
results.6 PSTR 513 0.0229260558102416 0.160482390671691
results.7 SBOU 188.5 0.594540505339535 0.970406743671115
results.8 SINT 2761.5 0.108854141600871 0.544270708004355
results.9 SSID 6407.5 4.20944147602638e-05 0.00033675531808211
holm bonferoni
results 0.00029542289260854 0.000328247658453933
results.1 1 1
results.2 0.000170321971776986 0.000170321971776986
results.3 0.963051438183188 1
results.4 0.16295406064738 0.271590101078966
results.5 1 1
results.6 0.160482390671691 0.229260558102416
results.7 1 1
results.8 0.544270708004355 1
results.9 0.00033675531808211 0.000420944147602638
So there are significant differences for CNAT, DSTO, SSID
## Get Barplot
mw.means.sp<-aggregate(Max_width~Sps+glom,data=dis.sps,FUN=mean)
#mw.means.sp
mw.se.sp<-aggregate(Max_width~Sps+glom,data=dis.sps,FUN=function(x) sd(x)/sqrt(length(x)))
size.sp<-cbind(mw.means.sp,mw.se.sp$Max_width)
colnames(size.sp)<-c("sps","state","mean","se")
ggplot(size.sp,aes(x=sps,y=mean,fill=state))+
geom_bar(stat="identity",position=position_dodge(.9))+theme(panel.background = element_blank())+
geom_errorbar(aes(ymax=mean + se, ymin=mean-se),width=0.2,position=position_dodge(.9))+
theme(text = element_text(family = "Times New Roman"))+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black"))+scale_fill_manual(values=c("Healthy" = "grey52","SCTLD"="grey28"), drop = FALSE)+
theme(legend.position="none")+scale_x_discrete(limits=c("SSID","SINT","MCAV","DSTO","CNAT","PSTR","SBOU","OANN","OFAV","DLAB","PCLI"))+
labs(x="",y="Maximum width (cm)")+
theme(legend.position = c(0.9, 0.7) ,legend.text=element_text(size=12),legend.title=element_blank(),legend.key.size =unit(0.5,"line"))+scale_y_continuous(breaks=seq(0,80,10),expand = c(0, 0),lim=c(0,80))
#progrates
#sps.avg
#size.sp
my.data.dis<-my.data%>%
filter(Sps!="AAGA",Sps!="ACER",Sps!="ATEN",Sps!="EFAS",Sps!="MANG",Sps!="MMEA",Sps!="MYCE",Sps!="OCUL",Sps!="ODIF",Sps!="PAST",Sps!="PDIV",Sps!="PPOR",Sps!="SRAD")%>%
droplevels()
head(my.data.dis)
Site Plot Sps Max_width Coral_ID coords_x coords_y X5.1.18 X6.1.18
1 1 23 DLAB 11 1_p23_t1_s0_c1_DLAB 0.448071 0.2361312 Healthy Healthy
2 1 23 SINT 22 1_p23_t1_s0_c2_SINT 0.789318 0.4790354 Healthy Healthy
3 1 23 SSID 25 1_p23_t1_s0_c3_SSID 0.833828 0.6278998 Healthy Healthy
4 1 23 DSTO 18 1_p23_t1_s0_c5_DSTO 0.462908 2.2250339 Healthy Healthy
5 1 23 CNAT 11 1_p23_t1_s0_c6_CNAT 0.151335 2.2052883 Healthy Healthy
6 1 23 SINT 16 1_p23_t1_s5_c2_SINT 0.314540 5.9977166 Healthy Healthy
X6.21.18 X7.16.18 X8.17.18 X10.30.18 X11.9.18 X11.29.18 X12.13.18 X1.4.19
1 Healthy Healthy Healthy Healthy Healthy Healthy Healthy Healthy
2 Healthy Healthy Healthy Healthy Healthy Healthy Healthy Healthy
3 Healthy Healthy Healthy Healthy Healthy Healthy Healthy Healthy
4 Healthy Healthy Healthy Healthy Healthy Unknown Healthy SCTLD
5 Healthy Healthy Healthy Dead Dead Dead Dead Dead
6 Healthy Healthy Healthy Healthy Healthy Healthy Healthy Healthy
X1.18.19 X2.8.19 X3.4.19 X3.21.19 X4.11.19 X5.2.19 X5.16.19 X5.28.19 X6.13.19
1 Healthy Healthy Healthy Healthy Healthy Healthy Healthy Healthy Healthy
2 Healthy Healthy Healthy Healthy Healthy Healthy Healthy Healthy Healthy
3 Healthy Healthy Healthy Healthy Healthy Healthy Healthy Healthy Healthy
4 SCTLD SCTLD SCTLD SCTLD SCTLD SCTLD SCTLD Dead Dead
5 Dead Dead Dead Dead Dead Dead Dead Dead Dead
6 Healthy Healthy Healthy Healthy Healthy Healthy Healthy Healthy Healthy
X7.1.19 X7.22.19 X8.16.19 X9.17.19 X10.14.19 X11.12.19 X12.6.19 total total_bin
1 Healthy Healthy Healthy Healthy Healthy Healthy Healthy 0 0
2 Healthy Healthy Healthy Healthy Healthy Healthy Healthy 0 0
3 Healthy Healthy Healthy Healthy Healthy Healthy Healthy 0 0
4 Dead Dead Dead Dead Dead Dead Dead 8 1
5 Dead Dead Dead Dead Dead Dead Dead 0 0
6 Healthy Healthy Healthy Healthy Healthy Healthy Healthy 0 0
days_dis weeks_dis glom
1 0 0.00000 Healthy
2 0 0.00000 Healthy
3 0 0.00000 Healthy
4 164 23.42857 SCTLD
5 0 0.00000 Healthy
6 0 0.00000 Healthy
timedifs<-read.csv("timedif_surveys.csv") #survey dates, number of days since last date, and running sum of days; calculated and formatted in excel
my.data.disonly<-my.data.dis%>%
filter(total_bin>0)
timedifs
survey_date days_since_last runsum
1 X10.30.18 0 0
2 X11.9.18 10 10
3 X11.29.18 20 30
4 X12.13.18 14 44
5 X1.4.19 22 66
6 X1.18.19 14 80
7 X2.8.19 21 101
8 X3.4.19 24 125
9 X3.21.19 17 142
10 X4.11.19 21 163
11 X5.2.19 21 184
12 X5.16.19 14 198
13 X5.28.19 12 210
14 X6.13.19 16 226
15 X7.1.19 18 244
16 X7.22.19 21 265
17 X8.16.19 25 290
18 X9.17.19 32 322
19 X10.14.19 27 349
20 X11.12.19 29 378
21 X12.6.19 24 402
need the new_inf_corals function
new_inf_corals<-function(df,steps,x){
## newly_I
newly_I<-matrix(0, nrow = nrow(df), ncol = steps) #blank matrix for storing newly infected corals
#newly_I
for (i in 1:steps){ #for each survey time point
col<-x+i #start with first tp after init tp
prev<-x+i-1
for (j in 1:(nrow(df))){ #for each row in the df
#print (df[j,col])
if ( df[j,col]=="SCTLD" ){ #if it's disease
#print( "found one")
if (df[j,prev]!="SCTLD"){ # and if it wasnt diseased before
newly_I[j,i]<-1 #add it to newly infected
}
}
}
}
return(newly_I)
}
head(my.data.disonly)
Site Plot Sps Max_width Coral_ID coords_x coords_y X5.1.18
1 1 23 DSTO 18 1_p23_t1_s0_c5_DSTO 0.462908 2.22503392 Healthy
2 1 23 DSTO 11 1_p23_t1_s5_c8_DSTO 0.596439 7.85750058 Healthy
3 1 23 DSTO 33 1_p23_t2_s0_c11_DSTO 1.753709 3.27072463 Healthy
4 1 23 DSTO 12 1_p23_t2_s5_c6_DSTO 1.071217 9.23473024 Healthy
5 1 23 DSTO 7 1_p23_t3_s5_c6_DSTO 2.362018 7.17633173 Healthy
6 1 23 DSTO 17 1_p23_t4_s0_c1_DSTO 3.133531 0.09741542 Healthy
X6.1.18 X6.21.18 X7.16.18 X8.17.18 X10.30.18 X11.9.18 X11.29.18 X12.13.18
1 Healthy Healthy Healthy Healthy Healthy Healthy Unknown Healthy
2 Healthy Healthy Healthy Healthy Healthy Healthy Healthy Healthy
3 Healthy Healthy Healthy Healthy Healthy Healthy Healthy Healthy
4 Healthy Healthy Healthy Healthy Healthy Healthy Healthy Healthy
5 Healthy Healthy Healthy Healthy Healthy Healthy Healthy Healthy
6 Healthy Healthy Healthy Healthy Healthy Healthy Healthy Healthy
X1.4.19 X1.18.19 X2.8.19 X3.4.19 X3.21.19 X4.11.19 X5.2.19 X5.16.19 X5.28.19
1 SCTLD SCTLD SCTLD SCTLD SCTLD SCTLD SCTLD SCTLD Dead
2 SCTLD SCTLD SCTLD SCTLD Dead Dead Dead Dead Dead
3 Healthy Healthy Healthy SCTLD SCTLD SCTLD Dead Dead Dead
4 Healthy Healthy Healthy Healthy Healthy Healthy SCTLD SCTLD SCTLD
5 Healthy Healthy Healthy SCTLD Dead Dead Dead Dead Dead
6 SCTLD SCTLD SCTLD SCTLD Dead Dead Dead Dead Dead
X6.13.19 X7.1.19 X7.22.19 X8.16.19 X9.17.19 X10.14.19 X11.12.19 X12.6.19 total
1 Dead Dead Dead Dead Dead Dead Dead Dead 8
2 Dead Dead Dead Dead Dead Dead Dead Dead 4
3 Dead Dead Dead Dead Dead Dead Dead Dead 3
4 Dead Dead Dead Dead Dead Dead Dead Dead 3
5 Dead Dead Dead Dead Dead Dead Dead Dead 1
6 Dead Dead Dead Dead Dead Dead Dead Dead 4
total_bin days_dis weeks_dis glom
1 1 164 23.428571 SCTLD
2 1 81 11.571429 SCTLD
3 1 72 10.285714 SCTLD
4 1 47 6.714286 SCTLD
5 1 24 3.428571 SCTLD
6 1 81 11.571429 SCTLD
colnames(my.data.disonly)
[1] "Site" "Plot" "Sps" "Max_width" "Coral_ID" "coords_x"
[7] "coords_y" "X5.1.18" "X6.1.18" "X6.21.18" "X7.16.18" "X8.17.18"
[13] "X10.30.18" "X11.9.18" "X11.29.18" "X12.13.18" "X1.4.19" "X1.18.19"
[19] "X2.8.19" "X3.4.19" "X3.21.19" "X4.11.19" "X5.2.19" "X5.16.19"
[25] "X5.28.19" "X6.13.19" "X7.1.19" "X7.22.19" "X8.16.19" "X9.17.19"
[31] "X10.14.19" "X11.12.19" "X12.6.19" "total" "total_bin" "days_dis"
[37] "weeks_dis" "glom"
wheninfected<-new_inf_corals(my.data.disonly,21,12) #df, steps, x
wheninfected
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
[1,] 0 0 0 0 1 0 0 0 0 0 0 0 0 0
[2,] 0 0 0 0 1 0 0 0 0 0 0 0 0 0
[3,] 0 0 0 0 0 0 0 1 0 0 0 0 0 0
[4,] 0 0 0 0 0 0 0 0 0 0 1 0 0 0
[5,] 0 0 0 0 0 0 0 1 0 0 0 0 0 0
[6,] 0 0 0 0 1 0 0 0 0 0 0 0 0 0
[7,] 0 0 0 0 0 0 1 0 0 0 0 0 0 0
[8,] 0 0 0 0 0 0 0 0 0 0 0 0 0 1
[9,] 0 0 0 0 0 0 0 0 0 0 0 0 1 0
[10,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[11,] 0 0 0 0 0 0 0 0 0 0 0 1 0 0
[12,] 0 0 0 0 0 0 0 0 0 1 0 0 0 0
[13,] 0 0 0 0 1 0 0 0 0 0 0 0 0 0
[14,] 0 0 0 0 0 0 0 1 0 0 0 0 0 0
[15,] 0 0 0 0 0 0 0 1 0 0 0 0 0 0
[16,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[17,] 0 0 0 0 0 0 0 1 0 0 0 0 0 0
[18,] 0 0 1 0 0 0 0 0 0 0 0 0 0 0
[19,] 0 0 0 0 0 1 0 0 0 0 0 0 0 0
[20,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[21,] 0 0 0 0 0 0 0 0 1 0 0 0 0 0
[22,] 0 0 0 0 0 0 0 0 0 0 1 0 0 0
[23,] 0 0 0 0 0 0 0 0 0 1 0 0 0 0
[24,] 0 0 0 0 0 0 0 0 0 0 1 0 0 0
[25,] 0 0 0 0 0 0 0 0 1 0 0 0 0 0
[26,] 0 0 0 0 0 0 0 0 0 1 0 0 0 0
[27,] 0 0 0 0 0 0 0 0 1 0 0 0 0 0
[28,] 0 0 0 0 0 0 0 0 0 0 0 1 0 0
[29,] 1 0 0 0 0 0 0 0 0 0 0 0 0 0
[30,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[31,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[32,] 0 0 0 1 0 0 0 0 0 0 0 0 0 0
[33,] 0 0 0 0 0 1 0 0 0 0 0 0 0 0
[34,] 0 0 0 0 0 1 0 0 0 0 0 0 0 0
[35,] 0 0 0 1 0 0 0 0 0 0 0 0 0 0
[36,] 0 0 0 0 1 0 0 0 0 0 0 0 0 0
[37,] 0 0 0 0 1 0 0 0 0 0 0 0 0 0
[38,] 0 0 0 0 0 0 0 1 0 0 0 0 0 0
[39,] 0 1 0 0 0 0 0 0 0 0 0 0 0 0
[40,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[41,] 0 0 0 0 0 1 0 0 0 0 0 0 0 0
[42,] 0 0 0 0 0 0 0 0 0 0 0 0 1 0
[43,] 0 0 0 0 0 0 1 0 0 0 0 0 0 0
[44,] 0 0 0 0 1 0 0 0 0 0 0 0 0 0
[45,] 0 0 0 0 0 1 0 0 0 0 0 0 0 0
[46,] 0 0 0 0 1 0 0 0 0 0 0 0 0 0
[47,] 0 0 0 0 0 0 0 0 1 0 0 0 0 0
[,15] [,16] [,17] [,18] [,19] [,20] [,21]
[1,] 0 0 0 0 0 0 0
[2,] 0 0 0 0 0 0 0
[3,] 0 0 0 0 0 0 0
[4,] 0 0 0 0 0 0 0
[5,] 0 0 0 0 0 0 0
[6,] 0 0 0 0 0 0 0
[7,] 0 0 0 0 0 0 0
[8,] 0 0 0 0 0 0 0
[9,] 0 0 0 0 0 0 0
[10,] 0 0 0 0 0 0 1
[11,] 0 0 0 0 0 0 0
[12,] 0 0 0 0 0 0 0
[13,] 0 0 0 0 0 0 0
[14,] 0 0 0 0 0 0 0
[15,] 0 0 0 0 0 0 0
[16,] 0 0 0 0 0 0 1
[17,] 0 0 0 0 0 0 0
[18,] 0 0 0 0 0 0 0
[19,] 0 0 0 0 0 0 0
[20,] 1 0 0 0 0 0 0
[21,] 0 0 0 0 0 0 0
[22,] 0 0 0 0 0 0 0
[23,] 0 0 0 0 0 0 0
[24,] 0 0 0 0 0 0 0
[25,] 0 0 0 0 0 0 0
[26,] 0 0 0 0 0 0 0
[27,] 0 0 0 0 0 0 0
[28,] 0 0 0 0 0 0 0
[29,] 0 0 0 0 0 0 0
[30,] 0 1 0 0 0 0 0
[31,] 0 0 0 0 1 0 0
[32,] 0 0 0 0 0 0 0
[33,] 0 0 0 0 0 0 0
[34,] 0 0 0 0 0 0 0
[35,] 0 0 0 0 0 0 0
[36,] 0 0 0 0 0 0 0
[37,] 0 0 0 0 0 0 0
[38,] 0 0 0 0 0 0 0
[39,] 0 0 0 0 0 0 0
[40,] 0 0 0 0 0 1 0
[41,] 0 0 0 0 0 0 0
[42,] 0 0 0 0 0 0 0
[43,] 0 0 0 0 0 0 0
[44,] 0 0 0 0 0 0 0
[45,] 0 0 0 0 0 0 0
[46,] 0 0 0 0 0 0 0
[47,] 0 0 0 0 0 0 0
[ reached getOption("max.print") -- omitted 129 rows ]
dateinf<-c()
for (i in 1:nrow(my.data.disonly)){ #for ech colony
for (j in 1:ncol(wheninfected)){ #for each date in wheninfected
if(wheninfected[i,j]==1){ #if a colony is infected
dateinf[i]<-j #the date infected is J
}
if(my.data.disonly$total[i]==0){
dateinf[i]<-"Healthy"
}
}
}
nrow(wheninfected)
[1] 176
nrow(my.data.disonly)
[1] 176
length(dateinf)
[1] 176
median(dateinf)
[1] 10.5
my.data.disonly$dateinf<-dateinf #starting in october
my.data.disonly$dateinf<-as.numeric(my.data.disonly$dateinf)
table(my.data.disonly$Site,my.data.disonly$dateinf)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
1 0 0 1 0 4 1 1 5 3 3 3 2 1 1 1 0 0 0 0 0 2
2 1 1 1 2 4 6 1 1 1 0 0 0 2 0 0 1 0 0 1 1 0
3 0 0 0 0 0 0 2 13 10 27 16 23 5 5 2 0 1 1 2 11 7
site1_initdis<-3
site2_initdis<-1
site3_initdis<-7
dateinfbysite<-data.frame()
dateinfbysite
data frame with 0 columns and 0 rows
for (i in 1:nrow(my.data.disonly)){ #for each colony
if(my.data.disonly$Site[i]==1){ #if in site 1
dateinfbysite[i,1]<-site1_initdis #make a new column that tracks date that infection started at each site
}
if(my.data.disonly$Site[i]==2){
dateinfbysite[i,1]<-site2_initdis
}
if(my.data.disonly$Site[i]==3){
dateinfbysite[i,1]<-site3_initdis
}
}
dateinfbysite
V1
1 3
2 3
3 3
4 3
5 3
6 3
7 3
8 3
9 3
10 3
11 3
12 3
13 3
14 3
15 3
16 3
17 3
18 3
19 3
20 3
21 3
22 3
23 3
24 3
25 3
26 3
27 3
28 3
29 1
30 1
31 1
32 1
33 1
34 1
35 1
36 1
37 1
38 1
39 1
40 1
41 1
42 1
43 1
44 1
45 1
46 1
47 1
48 1
49 1
50 1
51 1
52 7
53 7
54 7
55 7
56 7
57 7
58 7
59 7
60 7
61 7
62 7
63 7
64 7
65 7
66 7
67 7
68 7
69 7
70 7
71 7
72 7
73 7
74 7
75 7
76 7
77 7
78 7
79 7
80 7
81 7
82 7
83 7
84 7
85 7
86 7
87 7
88 7
89 7
90 7
91 7
92 7
93 7
94 7
95 7
96 7
97 7
98 7
99 7
100 7
101 7
102 7
103 7
104 7
105 7
106 7
107 7
108 7
109 7
110 7
111 7
112 7
113 7
114 7
115 7
116 7
117 7
118 7
119 7
120 7
121 7
122 7
123 7
124 7
125 7
126 7
127 7
128 7
129 7
130 7
131 7
132 7
133 7
134 7
135 7
136 7
137 7
138 7
139 7
140 7
141 7
142 7
143 7
144 7
145 7
146 7
147 7
148 7
149 7
150 7
151 7
152 7
153 7
154 7
155 7
156 7
157 7
158 7
159 7
160 7
161 7
162 7
163 7
164 7
165 7
166 7
167 7
168 7
169 7
170 7
171 7
172 7
173 7
174 7
175 7
176 7
my.data.disonly$datesiteinf<-dateinfbysite[,1] #add to major dataset
my.data.disonly$dateinfbysite<-as.numeric(my.data.disonly$dateinf)-my.data.disonly$datesiteinf #subtract the infection start date from the date each colony was infected
timedifs # came from somewhere else... saved csv
survey_date days_since_last runsum
1 X10.30.18 0 0
2 X11.9.18 10 10
3 X11.29.18 20 30
4 X12.13.18 14 44
5 X1.4.19 22 66
6 X1.18.19 14 80
7 X2.8.19 21 101
8 X3.4.19 24 125
9 X3.21.19 17 142
10 X4.11.19 21 163
11 X5.2.19 21 184
12 X5.16.19 14 198
13 X5.28.19 12 210
14 X6.13.19 16 226
15 X7.1.19 18 244
16 X7.22.19 21 265
17 X8.16.19 25 290
18 X9.17.19 32 322
19 X10.14.19 27 349
20 X11.12.19 29 378
21 X12.6.19 24 402
dateinf_days<-c()
datesiteinf_days<-c()
for (i in 1:nrow(my.data.disonly)){ #for each colony
dateinf_days[i]<-sum(timedifs$days_since_last[1:my.data.disonly$dateinf[i]])
datesiteinf_days[i]<-sum(timedifs$days_since_last[1:my.data.disonly$datesiteinf[i]])
}
#timedifs
#dateinf_days
#my.data.disonly$dateinf
#datesiteinf_days
my.data.disonly$dateinfbysite_days<-dateinf_days-datesiteinf_days
boxplot(my.data.disonly$dateinfbysite_days~my.data.disonly$Sps,na.rm=TRUE,ylab="Survey Number after")
my.data.disonly$dateinfbysite_weeks<-my.data.disonly$dateinfbysite_days/7
boxplot(my.data.disonly$dateinfbysite_weeks~my.data.disonly$Sps,na.rm=TRUE,ylab="Survey Number after")
infdate.sps.avg<-aggregate(dateinfbysite_weeks~Sps,data=my.data.disonly,FUN=mean)
infdate.sps.se<-aggregate(dateinfbysite_weeks~Sps,data=my.data.disonly,FUN=my.se)
infdate.sps.se[is.na(infdate.sps.se)]<-0
par(family="Times New Roman")
bp<-barplot(as.matrix(t(infdate.sps.avg$dateinfbysite_weeks)),ylim=c(0,60),las=1,ylab="When infected (Weeks after first infection at site)",names.arg=c(as.character(infdate.sps.avg$Sps)),las=2,col="grey")
arrows(x0=bp,x1=bp,y0=(infdate.sps.avg$dateinfbysite_weeks)-1.96*(infdate.sps.se$dateinfbysite_weeks),y1=(infdate.sps.avg$dateinfbysite_weeks)+1.96*(infdate.sps.se$dateinfbysite_weeks),code = 3, angle = 90, len = 0.02, xpd = NA)
zero-length arrow is of indeterminate angle and so skipped
infdate.sps.se[is.na(infdate.sps.se)]<-0
inftime<-ggplot(infdate.sps.avg,aes(Sps,y=dateinfbysite_weeks))+
geom_bar(stat="identity",fill="grey45")+theme(panel.background = element_blank())+
geom_errorbar(aes(ymax=infdate.sps.avg$dateinfbysite_weeks +infdate.sps.se$dateinfbysite_weeks, ymin=infdate.sps.avg$dateinfbysite_weeks-infdate.sps.se$dateinfbysite_weeks),width=0.2)+
theme(text = element_text(family = "Times New Roman"))+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black"))+
theme(legend.position="none")+scale_x_discrete(limits=c("SSID","SINT","MCAV","DSTO","CNAT","PSTR","SBOU","OANN","OFAV","DLAB","PCLI"))+
labs(x="",y="")+
ylab(expression(paste("Time of disease onset (weeks \n after first diseased signs observed)")))+
theme(legend.position = c(0.9, 0.7) ,legend.text=element_text(size=10),legend.title=element_blank(),legend.key.size =unit(0.5,"line"))+scale_y_continuous(breaks=seq(0,50,5),expand = c(0, 0),lim=c(0,50))
inftime
median(infdate.sps.avg$dateinfbysite_weeks)
[1] 12
Goal: Visualize relationships between total disease prevalence per plot and it's relationships with coral diversity and density metrics - Shannon Diversity - Species Richness - Species Density - Colony Density
str(my.data)
'data.frame': 2012 obs. of 38 variables:
$ Site : int 1 1 1 1 1 1 1 1 1 1 ...
$ Plot : int 23 23 23 23 23 23 23 23 23 23 ...
$ Sps : chr "DLAB" "SINT" "SSID" "PPOR" ...
$ Max_width: int 11 22 25 11 18 11 11 16 10 8 ...
$ Coral_ID : chr "1_p23_t1_s0_c1_DLAB" "1_p23_t1_s0_c2_SINT" "1_p23_t1_s0_c3_SSID" "1_p23_t1_s0_c4_PPOR" ...
$ coords_x : num 0.448 0.789 0.834 0.226 0.463 ...
$ coords_y : num 0.236 0.479 0.628 1.927 2.225 ...
$ X5.1.18 : chr "Healthy" "Healthy" "Healthy" "Healthy" ...
$ X6.1.18 : chr "Healthy" "Healthy" "Healthy" "Healthy" ...
$ X6.21.18 : chr "Healthy" "Healthy" "Healthy" "Healthy" ...
$ X7.16.18 : chr "Healthy" "Healthy" "Healthy" "Healthy" ...
$ X8.17.18 : chr "Healthy" "Healthy" "Healthy" "Healthy" ...
$ X10.30.18: chr "Healthy" "Healthy" "Healthy" "Healthy" ...
$ X11.9.18 : chr "Healthy" "Healthy" "Healthy" "Healthy" ...
$ X11.29.18: chr "Healthy" "Healthy" "Healthy" "Healthy" ...
$ X12.13.18: chr "Healthy" "Healthy" "Healthy" "Healthy" ...
$ X1.4.19 : chr "Healthy" "Healthy" "Healthy" "Healthy" ...
$ X1.18.19 : chr "Healthy" "Healthy" "Healthy" "Healthy" ...
$ X2.8.19 : chr "Healthy" "Healthy" "Healthy" "Healthy" ...
$ X3.4.19 : chr "Healthy" "Healthy" "Healthy" "Healthy" ...
$ X3.21.19 : chr "Healthy" "Healthy" "Healthy" "Healthy" ...
$ X4.11.19 : chr "Healthy" "Healthy" "Healthy" "Healthy" ...
$ X5.2.19 : chr "Healthy" "Healthy" "Healthy" "Healthy" ...
$ X5.16.19 : chr "Healthy" "Healthy" "Healthy" "Healthy" ...
$ X5.28.19 : chr "Healthy" "Healthy" "Healthy" "Healthy" ...
$ X6.13.19 : chr "Healthy" "Healthy" "Healthy" "Healthy" ...
$ X7.1.19 : chr "Healthy" "Healthy" "Healthy" "Healthy" ...
$ X7.22.19 : chr "Healthy" "Healthy" "Healthy" "Healthy" ...
$ X8.16.19 : chr "Healthy" "Healthy" "Healthy" "Healthy" ...
$ X9.17.19 : chr "Healthy" "Healthy" "Healthy" "Healthy" ...
$ X10.14.19: chr "Healthy" "Healthy" "Healthy" "Healthy" ...
$ X11.12.19: chr "Healthy" "Healthy" "Healthy" "Healthy" ...
$ X12.6.19 : chr "Healthy" "Healthy" "Healthy" "Healthy" ...
$ total : int 0 0 0 0 8 0 0 0 0 0 ...
$ total_bin: int 0 0 0 0 1 0 0 0 0 0 ...
$ days_dis : int 0 0 0 0 164 0 0 0 0 0 ...
$ weeks_dis: num 0 0 0 0 23.4 ...
$ glom : Factor w/ 2 levels "Healthy","SCTLD": 1 1 1 1 2 1 1 1 1 1 ...
my.data$Plot<-as.factor(my.data$Plot)
### need a df where each row is a plot and columns are species
sps.list<-(table(my.data$Plot,my.data$Sps))
sps.list
AAGA ACER CNAT DLAB DSTO MCAV MMEA MYCE OANN OCUL ODIF OFAV PAST PCLI PDIV
23 0 0 3 2 19 5 0 0 0 0 0 2 57 1 0
25 0 0 1 1 23 5 0 0 0 0 0 3 114 2 0
27 0 1 1 0 19 0 0 0 0 0 0 1 35 0 0
28 1 12 0 4 15 10 0 0 0 0 0 3 99 1 1
45 0 0 23 3 7 37 1 0 0 2 0 6 56 1 0
47 0 0 33 3 5 97 0 2 33 0 2 10 79 0 0
PPOR PSTR SBOU SINT SRAD SSID
23 7 7 0 68 0 104
25 2 4 4 97 2 160
27 0 5 4 53 0 106
28 0 3 1 53 0 119
45 0 20 22 76 3 45
47 0 16 19 56 4 111
sps.df<-rbind(sps.list[1,],sps.list[2,],sps.list[3,],sps.list[4,],sps.list[5,],sps.list[6,])
row.names(sps.df)<-c("p23","p25","p27","p28","p45","p47")
### calculate metrics
sh.div<-diversity(sps.df,index="shannon")
sp.rich<-specnumber(sps.df)
mean(sp.rich)
[1] 12.33333
my.se(sp.rich)
[1] 0.802773
evenness.J<-sh.div/specnumber(sps.df)
sps.density<-sp.rich/100 #divide by area of plot 100m^2
col.density<-rowSums(sps.list)/100
ncorals<-rowSums(sps.list)
sp.info<-cbind(sh.div,sp.rich,sps.density,evenness.J,col.density,ncorals)
sp.info
sh.div sp.rich sps.density evenness.J col.density ncorals
p23 1.625135 11 0.11 0.1477396 2.75 275
p25 1.503400 13 0.13 0.1156462 4.18 418
p27 1.421772 9 0.09 0.1579747 2.25 225
p28 1.614116 13 0.13 0.1241627 3.22 322
p45 2.095053 14 0.14 0.1496466 3.02 302
p47 2.087046 14 0.14 0.1490747 4.70 470
plotprev<-aggregate(total_bin~Plot,data=my.data,FUN=function(x) sum(x)/length(x))
plotprev
Plot total_bin
1 23 0.04727273
2 25 0.03588517
3 27 0.03555556
4 28 0.04658385
5 45 0.16225166
6 47 0.16170213
avgplotsiteprev<-(plotprev[seq(from = 1, to = NROW(plotprev), by = 2),2]+plotprev[seq(from = 2,to = NROW(plotprev), by = 2),2])/2
#make dataframe for plot-level data
colnames(plotprev)<-c("Plot","totprev")
plot.df<-cbind(plotprev,sp.info)
sitenum<-c(1,1,2,2,3,3)
plot.df<-cbind(plot.df,sitenum)
plot.df
Plot totprev sh.div sp.rich sps.density evenness.J col.density ncorals
p23 23 0.04727273 1.625135 11 0.11 0.1477396 2.75 275
p25 25 0.03588517 1.503400 13 0.13 0.1156462 4.18 418
p27 27 0.03555556 1.421772 9 0.09 0.1579747 2.25 225
p28 28 0.04658385 1.614116 13 0.13 0.1241627 3.22 322
p45 45 0.16225166 2.095053 14 0.14 0.1496466 3.02 302
p47 47 0.16170213 2.087046 14 0.14 0.1490747 4.70 470
sitenum
p23 1
p25 1
p27 2
p28 2
p45 3
p47 3
#add maxwidth
plot.df$avg.max_width<-aggregate(Max_width~Plot,FUN=mean,data=my.data)
#add cover
str(coverlong) #from percent cover analysis above
'data.frame': 600 obs. of 4 variables:
$ Site : Factor w/ 3 levels "Midchannel","Nearshore",..: 1 1 1 1 1 1 1 1 1 1 ...
$ percent.cover: num 4.17 8.7 4.17 0 0 ...
$ timept : Factor w/ 2 levels "t1","t2": 1 1 1 1 1 1 1 1 1 1 ...
$ plotnum : Factor w/ 9 levels "p23","p24","p25",..: 1 1 1 1 1 1 1 1 1 1 ...
cover.means<-aggregate(percent.cover~plotnum+timept,data=coverlong,FUN=mean)
cover<-cover.means[1:6,3]
plot.df$initcover<-cover
plot.df$avg.max_width
Plot Max_width
1 23 18.58545
2 25 17.17943
3 27 14.10222
4 28 16.10870
5 45 30.78477
6 47 33.83830
plot.data<-plot.df
plot.data$avgmaxwidth<-plot.data$avg.max_width$Max_width
div.mod<-lm(totprev~sh.div,data=plot.data)
div.plot<-ggplot(div.mod$model, aes_string(x = names(div.mod$model)[2], y = names(div.mod$model)[1])) +
geom_point(aes(color=c("Midchannel","Midchannel","Offshore","Offshore","Inshore","Inshore"))) +
stat_smooth(method = "lm", col = "red",se=TRUE,size=.5,alpha=0.1)+
theme(panel.background = element_blank())+ theme(text = element_text(family = "Times New Roman"))+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black"))+theme(legend.position="None")+
labs(x="Shannon diversity",y="Total prevalence")+scale_color_manual(values=c("grey","light blue","blue"))+scale_y_continuous(breaks=seq(0,.2,.05),expand = c(0, 0),limits=c(-0.01,.25))
div.plot
dens.mod<-lm(totprev~col.density,data=plot.data)
dens.plot<-ggplot(dens.mod$model, aes_string(x = names(dens.mod$model)[2], y = names(dens.mod$model)[1])) +
geom_point(aes(color=c("Midchannel","Midchannel","Offshore","Offshore","Inshore","Inshore"))) +
stat_smooth(method = "lm", col = "red",se=TRUE,size=.5,alpha=0.1,linetype=2)+
theme(panel.background = element_blank())+ theme(text = element_text(family = "Times New Roman"))+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black"))+theme(legend.position="None")+
labs(x="Colony density",y="Total prevalence",color="Site")+scale_color_manual(values=c("grey","light blue","blue"))+scale_y_continuous(breaks=seq(-.1,.3,.05),limits=c(-0.1,.3))
dens.plot
div.plot
cov.mod<-lm(totprev~initcover,data=plot.data)
cov.plot<-ggplot(cov.mod$model, aes_string(x = names(cov.mod$model)[2], y = names(cov.mod$model)[1])) +
geom_point(aes(color=c("Midchannel","Midchannel","Offshore","Offshore","Inshore","Inshore"))) +
stat_smooth(method = "lm", col = "red",se=TRUE,size=.5,alpha=0.1)+
theme(panel.background = element_blank())+ theme(text = element_text(family = "Times New Roman"))+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black"))+theme(legend.position="None")+
labs(x="Initial percent coral cover",y="Total prevalence",color="Site")+scale_color_manual(values=c("grey","light blue","blue"))+scale_y_continuous(breaks=seq(0,.3,.05),expand = c(0, 0),limits=c(-0.01,.3))
cov.plot
summary(cov.mod)
Call:
lm(formula = totprev ~ initcover, data = plot.data)
Residuals:
p23 p25 p27 p28 p45 p47
-0.006897 -0.020858 -0.002318 0.003260 0.048757 -0.021944
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.030299 0.016617 1.823 0.1423
initcover 0.004782 0.001089 4.390 0.0118 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.02897 on 4 degrees of freedom
Multiple R-squared: 0.8281, Adjusted R-squared: 0.7852
F-statistic: 19.27 on 1 and 4 DF, p-value: 0.01178
size.mod<-lm(totprev~avgmaxwidth,data=plot.data)
size.plot<-ggplot(size.mod$model, aes_string(x = names(size.mod$model)[2], y = names(size.mod$model)[1])) +
geom_point(aes(color=c("Midchannel","Midchannel","Offshore","Offshore","Inshore","Inshore"))) +
stat_smooth(method = "lm", col = "red",se=TRUE,size=.5,alpha=0.1)+
theme(panel.background = element_blank())+ theme(text = element_text(family = "Times New Roman"))+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black"))+theme(legend.position="None")+
labs(x="Average colony maximum width",y="Total prevalence",color="Site")+scale_color_manual(values=c("grey","light blue","blue"))+scale_y_continuous(breaks=seq(0,.2,.05),expand = c(0, 0),limits=c(0,.25))
size.plot
dens.plot+cov.plot+div.plot+size.plot
library(viridis)
library(factoextra)
Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(FactoMineR)
library(corrplot)
corrplot 0.84 loaded
#get the number of each susceptible species per quadrat
num_dsto<-table(my.data$Plot,my.data$Sps=="DSTO")
num_mcav<-table(my.data$Plot,my.data$Sps=="MCAV")
num_pstri<-table(my.data$Plot,my.data$Sps=="PSTR")
num_cnats<-table(my.data$Plot,my.data$Sps=="CNAT")
num_dlab<-table(my.data$Plot,my.data$Sps=="DLAB")
num_ofav<-table(my.data$Plot,my.data$Sps=="OFAV")
num_oann<-table(my.data$Plot,my.data$Sps=="OANN")
num_sbou<-table(my.data$Plot,my.data$Sps=="SBOU")
num_sint<-table(my.data$Plot,my.data$Sps=="SINT")
num_ssid<-table(my.data$Plot,my.data$Sps=="SSID")
Pstr<-num_pstri[,2]
Cnat<-num_cnats[,2]
Dsto<-num_dsto[,2]
Mcav<-num_mcav[,2]
Dlab<-num_dlab[,2]
Ofav<-num_ofav[,2]
Oann<-num_oann[,2]
Sbou<-num_sbou[,2]
Sint<-num_sint[,2]
Ssid<-num_ssid[,2]
df<-cbind(Pstr,Cnat,Dsto,Mcav,Dlab,Ofav,Oann,Sbou,Sint,Ssid)
df
Pstr Cnat Dsto Mcav Dlab Ofav Oann Sbou Sint Ssid
23 7 3 19 5 2 2 0 0 68 104
25 4 1 23 5 1 3 0 4 97 160
27 5 1 19 0 0 1 0 4 53 106
28 3 0 15 10 4 3 0 1 53 119
45 20 23 7 37 3 6 0 22 76 45
47 16 33 5 97 3 10 33 19 56 111
norm.df<-df/rowSums(df) #normalized (by total number s sps) abundances of susceptible species at the 6 plots.
norm.df
Pstr Cnat Dsto Mcav Dlab Ofav Oann
23 0.03333333 0.014285714 0.09047619 0.02380952 0.009523810 0.009523810 0.00000000
25 0.01342282 0.003355705 0.07718121 0.01677852 0.003355705 0.010067114 0.00000000
27 0.02645503 0.005291005 0.10052910 0.00000000 0.000000000 0.005291005 0.00000000
28 0.01442308 0.000000000 0.07211538 0.04807692 0.019230769 0.014423077 0.00000000
45 0.08368201 0.096234310 0.02928870 0.15481172 0.012552301 0.025104603 0.00000000
47 0.04177546 0.086161880 0.01305483 0.25326371 0.007832898 0.026109661 0.08616188
Sbou Sint Ssid
23 0.000000000 0.3238095 0.4952381
25 0.013422819 0.3255034 0.5369128
27 0.021164021 0.2804233 0.5608466
28 0.004807692 0.2548077 0.5721154
45 0.092050209 0.3179916 0.1882845
47 0.049608355 0.1462141 0.2898172
s.sps.pca<-PCA(norm.df) #performs principle component analysis on normalized species counts dataframe
get_eigenvalue(s.sps.pca) #eigenvalues measures the amount of variation in each principle component
eigenvalue variance.percent cumulative.variance.percent
Dim.1 6.7890506 67.890506 67.89051
Dim.2 1.8998970 18.998970 86.88948
Dim.3 1.0720109 10.720109 97.60959
Dim.4 0.1288958 1.288958 98.89854
Dim.5 0.1101456 1.101456 100.00000
#scree plot
fviz_eig(s.sps.pca) #visualizes hte amount of variation explained by each pc
#extract results for variables
var<-get_pca_var(s.sps.pca)
var$contrib #contrib is the contribution in percentage of the variables to the principle components
Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
Pstr 8.648052 18.89401759 0.89468595 24.4582685 11.6170303
Cnat 14.075118 1.35696007 1.31498009 0.4123279 3.6534045
Dsto 13.893693 0.69630742 0.77424481 22.7942988 5.3022226
Mcav 13.726161 3.47864033 0.01050496 0.4783564 1.1840540
Dlab 0.961800 0.49268259 85.93412585 2.6546898 0.6332324
Ofav 13.894791 0.01265654 4.07308614 8.5607440 1.5768026
Oann 6.655120 26.36126338 2.96356953 0.4679873 13.5917214
Sbou 10.708350 9.73707571 3.10128100 2.5760179 46.7052768
Sint 4.258664 34.42401306 0.04469311 36.7769916 8.1466799
Ssid 13.178251 4.54638331 0.88882857 0.8203179 7.5895756
corrplot(var$contrib,is.corr=FALSE)
fviz_cos2(s.sps.pca,choice="var",axes=1:2)
So Dlab less important for explaining most of the variation among quadrats
fviz_contrib(s.sps.pca,choice="var",axes=1)
fviz_contrib(s.sps.pca,choice="var",axes=2)
fviz_pca_var(s.sps.pca,alpha.var="contrib",gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"))
dimdesc(s.sps.pca,axes=c(1,2),proba=0.05)
$Dim.1
$quanti
correlation p.value
Cnat 0.9775310 0.0007516111
Ofav 0.9712489 0.0012280570
Mcav 0.9653373 0.0017814340
Sbou 0.8526402 0.0309724134
Ssid -0.9458743 0.0043151102
Dsto -0.9712105 0.0012313229
attr(,"class")
[1] "condes" "list "
$Dim.2
named list()
attr(,"class")
[1] "condes" "list "
$call
$call$num.var
[1] 1
$call$proba
[1] 0.05
$call$weights
[1] 1 1 1 1 1 1
$call$X
Dim.1 Pstr Cnat Dsto Mcav Dlab Ofav
23 -1.733028 0.03333333 0.014285714 0.09047619 0.02380952 0.009523810 0.009523810
25 -1.995877 0.01342282 0.003355705 0.07718121 0.01677852 0.003355705 0.010067114
27 -2.268308 0.02645503 0.005291005 0.10052910 0.00000000 0.000000000 0.005291005
28 -1.315356 0.01442308 0.000000000 0.07211538 0.04807692 0.019230769 0.014423077
45 3.396305 0.08368201 0.096234310 0.02928870 0.15481172 0.012552301 0.025104603
47 3.916264 0.04177546 0.086161880 0.01305483 0.25326371 0.007832898 0.026109661
Oann Sbou Sint Ssid
23 0.00000000 0.000000000 0.3238095 0.4952381
25 0.00000000 0.013422819 0.3255034 0.5369128
27 0.00000000 0.021164021 0.2804233 0.5608466
28 0.00000000 0.004807692 0.2548077 0.5721154
45 0.00000000 0.092050209 0.3179916 0.1882845
47 0.08616188 0.049608355 0.1462141 0.2898172
#This function is designed to point out the variables and the categories that are the most characteristic according to each dimension obtained by a Factor Analysis.
var<-get_pca_var(s.sps.pca)
var$contrib
Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
Pstr 8.648052 18.89401759 0.89468595 24.4582685 11.6170303
Cnat 14.075118 1.35696007 1.31498009 0.4123279 3.6534045
Dsto 13.893693 0.69630742 0.77424481 22.7942988 5.3022226
Mcav 13.726161 3.47864033 0.01050496 0.4783564 1.1840540
Dlab 0.961800 0.49268259 85.93412585 2.6546898 0.6332324
Ofav 13.894791 0.01265654 4.07308614 8.5607440 1.5768026
Oann 6.655120 26.36126338 2.96356953 0.4679873 13.5917214
Sbou 10.708350 9.73707571 3.10128100 2.5760179 46.7052768
Sint 4.258664 34.42401306 0.04469311 36.7769916 8.1466799
Ssid 13.178251 4.54638331 0.88882857 0.8203179 7.5895756
km <- kmeans(var$coord, centers = 2, nstart = 25)
grp <- as.factor(km$cluster)
nice.biplot<-fviz_pca_biplot(s.sps.pca,
# Fill individuals by groups
#geom.ind = "point",
pointshape = 21,
pointsize = 2.5,
mean.point=FALSE,
fill.ind = as.factor(c("Midchannel","Midchannel","Offshore","Offshore","Nearshore","Nearshore")),
col.ind = "black",
# Color variable by groups
col.var = grp,
alpha.var ="contrib",
legend.title = list(fill = "Site", alpha="Contribution",color="Cluster"),
repel=TRUE,
geom.ind=c("point","text"),
axes.linetype="dashed") +
ggpubr::fill_palette(c("blue","grey","light blue"))+
ggpubr::color_palette(c("dark grey","black"))
nice.biplot
right.biplot<-ggpubr::ggpar(nice.biplot,
title="",
ggtheme=theme_classic(),legend="right",ylab="PC 2",xlab="PC 1")
good.biplot<-right.biplot+
theme(panel.background = element_blank())+ theme(text = element_text(family = "Times New Roman"))
tiff("Figure5.tiff",width=180, height=210,units="mm",res=300)
patch<-(dens.plot/div.plot) | (cov.plot/size.plot)
(patch | good.biplot) + plot_annotation(tag_levels="a")+ plot_layout(widths=c(1.5,1.5,3))+theme(text = element_text(family = "Times New Roman",size=12))
dev.off()
null device
1
tiff("Figure5.tiff",width=180, height=300,units="mm",res=300)
#patch<-(dens.plot/div.plot) | (cov.plot/size.plot)
(patch / good.biplot) + plot_annotation(tag_levels="A")+ plot_layout(widths=c(1.5,1.5,3))+theme(text = element_text(family = "Times New Roman",size=12))
dev.off()
null device
1
Prevalence, Panel A
Percent tissue loss per week, panel B
Infection timing, panel C
Size and disease panel D
tiff("Figure4.tiff",width=85, height=260,units="mm",res=300)
(prevplot/progrates/inftime/size.sp.plot)+ plot_annotation(tag_levels="A")
dev.off()
null device
1